First, let’s load our STM results from Part 1.
load("~/Dropbox (UNC Charlotte)/NCStateSenateFacebook/data/stmFit.RData")
load("~/Dropbox (UNC Charlotte)/NCStateSenateFacebook/data/out.RData")
Let’s explore the size of the topics by their topic proportions.
library(stm)
plot.STM(stmFit, type = "summary", xlim = c(0,.14), n = 5, #labeltype = "frex",
main = "NC State Senators' Topics on Facebook", text.cex = 0.8)
I’ve assigned labels to the topics based on my interpretation of the word-topic probabilities (see next section).
topicNames <- labelTopics(stmFit)
k <- 40
topic <- data.frame(
topicnames = c("Press Conference",
"Local Government",
"Religious Freedom",
"#WeAreNotThis",
"Positive Outlook",
"Pat McCrory",
"Legislation",
"Gerrymandering",
"Supreme Court",
"Voter ID/Fraud",
"Presidential Election",
"Campaign Support",
"Health Care",
"Congressional Elections",
"North Carolina",
"HB2",
"Constituent Newsletter",
"Sen Van Duyn Posts",
"God, Family, Freedom",
"Teacher Pay",
"#NCPOL and #NCGA",
"Hurricane Matthew",
"Redistricting",
"Taxes",
"Congratulations",
"Bathroom Safety",
"Economy/Jobs",
"Student/Women's Issues",
"Hillary Clinton",
"Church",
"Civil Rights",
"Energy Tax Credits",
"Roy Cooper",
"Get Out the Vote",
"Gun Violence",
"Berger Press Releases",
"Public Assistance",
"Town Hall Events",
"Holiday Wishes",
"Conservative Values"),
TopicNumber = 1:k,
TopicProportions = colMeans(stmFit$theta))
par(mfrow = c(4,2),mar = c(1, 1, 2, 1))
for (i in 1:k){
plot.STM(stmFit, type = "labels", n = 15,
topics = i, main = paste0(topic$topicnames[i]," - Raw Probabilities"),
width = 55)
plot.STM(stmFit, type = "labels", n = 15,
topics = i, main = paste0(topic$topicnames[i]," - FREX"),
labeltype = "frex", width = 55)
}
#shortdoc <- substr(subset(fbData$ID,out$meta$ID),1,200))
#thoughts3 <- findThoughts(stmFit, texts = shortdoc, n = 2, topics = 2)
par(mfrow = c(1,1),mar = c(2, 2, 2, 2))
topicQuality(stmFit,
documents = out$documents,
main = "Topic Interpretability: Exclusivity and Semantic Coherence")
## [1] -117.69762 -99.02861 -108.36939 -113.10056 -119.04781 -83.70658
## [7] -114.95785 -106.45754 -94.32849 -83.56333 -60.37099 -107.18429
## [13] -85.71741 -96.02803 -97.72806 -96.74052 -133.55053 -109.95572
## [19] -131.66939 -68.16150 -170.58757 -131.58723 -89.55902 -85.80290
## [25] -137.62628 -63.21635 -80.50761 -141.80216 -144.41775 -155.46163
## [31] -106.00173 -145.81977 -82.39580 -84.88564 -156.92077 -49.98792
## [37] -141.26560 -126.25005 -165.69881 -129.67554
## [1] 9.835140 9.854379 9.658130 9.788529 9.958288 9.982604 9.874494
## [8] 9.775709 9.805462 9.818819 9.903764 9.816718 9.858738 9.807711
## [15] 9.940360 9.678566 9.849708 9.946512 9.778923 9.777436 9.881158
## [22] 9.791882 9.689455 9.664901 9.883178 9.892371 9.693516 9.778924
## [29] 9.574346 9.878137 9.909861 9.706640 9.881069 9.860808 9.713992
## [36] 9.889008 9.734380 9.794439 9.742642 9.959924
prep <- estimateEffect(1:k ~ Party + s(Time), stmFit, meta = out$meta, uncertainty = "Global")
Result <- plot.estimateEffect(prep, "Party", method = "difference",
cov.value1 = "Democratic", cov.value2 = "Republican",
verbose.labels = F,
ylab = "Expected Difference in Topic Probability by Party \n (with 95% Confidence Intervals)",
xlab = "More Likely Republican Not Significant More Likely Democratic",
main = "Effect of Party on Topic Prevelance for Facebook Posts of NC State Senators",
xlim = c(-0.08,0.08))
# order based on Expected Topic Proportion
rank = order(unlist(Result$means))
topic <- topic[rank,]
par(mfrow = c(1,1),mar = c(6, 6, 4, 4))
STMresults <- plot.estimateEffect(prep, "Party", method = "difference", cov.value1 = "Democratic",
cov.value2 = "Republican",
topics = topic$TopicNumber,
verbose.labels = F,
ylab = "Expected Difference in Topic Probability by Party \n (with 95% Confidence Intervals)",
labeltype = "custom",
xlab = "More Likely Republican Not Significant More Likely Democratic",
custom.labels = topic$topicnames,
main = "Effect of Party on Topic Prevelance for Facebook Posts of NC State Senators",
xlim = c(-.08,0.08))
# time
par(mfrow = c(2,2),mar = c(4,4,2,2))
for (i in 1:k){
plot.estimateEffect(prep, "Time", method = "continuous", topics = rank[i], model = z,
main = paste0(topic$topicnames[i],": Topic ",rank[i]),
printlegend = FALSE, ylab = "Exp. Topic Prob",
xlab = "Time (Month, 1 = Jan 2015 to 24 = Dec 2016)", ylim = c(-0.01,0.2)
)
}
# Pres Election and Hillary Clinton
plot.STM(stmFit, type = "perspectives", topics = c(11,29), n=30, plabels = c("Presidential Election","Hillary Clinton"))
Let’s create an interactive network for the topics (nodes) in which an edge represents a significant correlation between the topic. The size of the node is the prevalence (expected topic proportion) for the topic.
library(igraph); library(visNetwork)
par(mfrow = c(1,1))
mod.out.corr <- topicCorr(stmFit, cutoff = .025)
#library(corrplot)
#corrplot(mod.out.corr$cor, order="hclust", hclust.method="ward.D2", method = "circle", type = "lower", diag = F)
#mod.out.corr <- topicCorr(stmFit, method = "huge")
links2 <- as.matrix(mod.out.corr$posadj)
net2 <- graph_from_adjacency_matrix(links2, mode = "undirected")
table(V(net2)$type)
## < table of extent 0 >
net2 <- simplify(net2, remove.multiple = F, remove.loops = T)
links <- as_data_frame(net2, what="edges")
nodes <- as_data_frame(net2, what="vertices")
# Community Detection
clp <- cluster_label_prop(net2)
nodes$community <- clp$membership
means <- as.data.frame(unlist(STMresults$means))
colnames(means) <- "means"
color <- colorRamp(c("white","blue"))(abs(means$means)/0.05)
means$colorDem <- rgb(color[,1],color[,2],color[,3], maxColorValue=255)
color <- colorRamp(c("white","red"))(abs(means$means)/0.05)
means$colorRep <- rgb(color[,1],color[,2],color[,3], maxColorValue=255)
means$color <- ifelse(means$means>0,means$colorDem,means$colorRep)
#visNetwork edits
nodes$shape <- "dot"
nodes$shadow <- TRUE # Nodes will drop shadow
nodes$title <- apply(topicNames$prob, 1, function(x) paste0(x, collapse = " + "))[rank] # Text on click
nodes$label <- topic$topicnames # Node label
nodes$size <- (topic$TopicProportions / max(topic$TopicProportions)) * 40 # Node size
nodes$borderWidth <- 2 # Node border width
nodes$color.background <- means$color
nodes$color.border <- "black"
nodes$color.highlight.background <- "orange"
nodes$color.highlight.border <- "darkred"
nodes$id <- topic$TopicNumber
visNetwork(nodes, links, width="100%", height="600px", main="NC State Senator Topic (Correlation) Network") %>% visOptions(highlightNearest = list(enabled = TRUE, algorithm = "hierarchical")) %>%
visInteraction(navigationButtons = TRUE)